Please install the following packages:
#macrosheds
devtools::install_github('https://github.com/MacroSHEDS/macrosheds.git')
#tidyverse (dplyr, stringr, readr, etc.)
install.packages('tidyverse')
Then download the MacroSheds data we will use
library(macrosheds)
# this is where files will be downloaded. feel free to change it.
project_root <- '~/macrosheds_workshop'
# datasets we will use
ms_download_ws_attr(project_root, dataset = 'summaries')
ms_download_core_data(project_root, domains = c('niwot', 'plum', 'east_river', 'santee'))
These packages are optional (only used here and there, and the overall sequence doesn’t depend on them)
install.packages('xts') # time series representation
install.packages('dygraphs') # interactive plots
install.packages('factoextra') # ordination, etc.
install.packages('mapview') # interactive maps
install.packages('data.table') # efficient computation
install.packages('whitebox') # geospatial processing
whitebox::install_whitebox() #additional whitebox binaries. if this fails, use the next line
# whitebox::install_whitebox(platform = 'linux_musl')
long-term watershed ecosystem studies, harmonized
suppressPackageStartupMessages(library(tidyverse))
library(macrosheds)
macrosheds v1.0.2: Up to date ✓
This package is licensed under MIT, but licensing of MacroSheds data is more complex. See
https://docs.google.com/document/d/1CPaQ705QyoWfu6WjA4xHgQooCQ8arq3NZ8DO9tb9jZ0/edit?usp=sharing
Complete metadata: https://portal.edirepository.org/nis/mapbrowse?scope=edi&identifier=1262
project_root <- '~/macrosheds_workshop'
ms_sites <- ms_load_sites() %>%
filter(site_type == 'stream_gauge') #some precip gauges have the same names as stream gauges!
?ms_download_ws_attr
# ms_download_ws_attr(project_root, dataset = 'summaries')
?ms_load_product
ws_smry <- ms_load_product(project_root, prodname = 'ws_attr_summaries')
ws_attr_summaries.csvvariable_category_codes_ws_attr.csv,
variable_data_source_codes_ws_attr.csvsuppressPackageStartupMessages(library(factoextra))
pca_data <- ws_smry %>%
select(where( ~sum(is.na(.)) / length(.) < 0.3)) %>% #drop columns with >= 30% missing values
drop_na() #drop rows with any missing values
domains <- pca_data$domain
pca_data <- pca_data %>%
select(-site_code, -domain, -network) %>%
select(where(~sd(., na.rm = TRUE) != 0)) %>% #drop columns with no variance
as.matrix()
smry_categories <- substr(colnames(pca_data), 1, 1)
category_map <- c('c' = 'climate', 'h' = 'hydrology', 'l' = 'landcover',
'p' = 'parentmat', 't' = 'terrain', 'v' = 'vegetation',
'w' = 'ws area')
smry_categories <- factor(category_map[match(smry_categories, names(category_map))])
pca <- prcomp(pca_data, center = TRUE, scale. = TRUE)
fviz_eig(pca)
fviz_pca_biplot(pca, geom.var = 'arrow', geom.ind = 'point', title = '',
col.var = smry_categories, palette = 'Dark2')
fviz_pca_biplot(pca, geom.var = '', geom.ind = 'point', title = '',
col.ind = as.factor(domains))
elev_extreme_domains <- ws_smry %>%
filter(! is.na(te_elev_mean)) %>% #no watersheds delineated for McMurdo LTER
group_by(domain) %>%
summarize(domain_mean_elev = mean(te_elev_mean)) %>%
ungroup() %>%
arrange(desc(domain_mean_elev)) %>%
slice(c(1:2, (n() - 1):n())) %>% #2 highest and 2 lowest domains, by average watershed elevation
print() %>%
pull(domain)
# A tibble: 4 × 2
domain domain_mean_elev
<chr> <dbl>
1 niwot 3593.
2 east_river 3344.
3 plum 31.7
4 santee 9.10
?ms_load_variables
sitevars <- ms_load_variables('timeseries_by_site')
sitevars <- filter(sitevars,
domain %in% elev_extreme_domains,
chem_category == 'stream_conc')
length(unique(sitevars$site_code)) # n sites
[1] 43
sitevars <- sitevars %>%
mutate(ndays = difftime(last_record_utc, first_record_utc, unit = 'days')) %>%
filter(ndays >= 2 * 365.25,
mean_obs_per_day * 365.25 >= 36)
get_shared_domain_vars(sitevars)
[1] "Cl" "DOC" "PO4_P" "SO4_S" "TDN"
?ms_download_core_data
# ms_download_core_data(project_root, domains = elev_extreme_domains)
?ms_load_product
(doc <- ms_load_product(
project_root,
prodname = 'stream_chemistry',
filter_vars = 'DOC',
domain = elev_extreme_domains,
warn = FALSE
))
# A tibble: 162,001 × 7
datetime site_code var val ms_status ms_interp val_err
<dttm> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 2015-11-09 00:00:00 Avery GN_DOC 0.38 0 0 0.0001
2 2015-11-10 00:00:00 Avery GN_DOC 0.374 0 1 0.0001
3 2015-11-11 00:00:00 Avery GN_DOC 0.367 0 1 0.0001
4 2015-11-12 00:00:00 Avery GN_DOC 0.361 0 1 0.0001
5 2015-11-13 00:00:00 Avery GN_DOC 0.354 0 1 0.0001
6 2015-11-14 00:00:00 Avery GN_DOC 0.348 0 1 0.0001
7 2015-11-15 00:00:00 Avery GN_DOC 0.341 0 1 0.0001
8 2015-11-16 00:00:00 Avery GN_DOC 0.335 0 1 0.0001
9 2015-11-17 00:00:00 Avery GN_DOC 0.329 0 1 0.0001
10 2015-11-18 00:00:00 Avery GN_DOC 0.322 0 1 0.0001
# ℹ 161,991 more rows
unique(doc$var)
[1] "GN_DOC"
table(doc$ms_status)
0 1
161930 71
table(doc$ms_interp)
0 1
126140 35861
timeseries_boulder.csvsitevarsdoc_wide <- doc %>%
filter(ms_status == 0) %>% #remove ms_status == 1 (questionable)
select(datetime, site_code, val) %>%
left_join(select(ms_sites, site_code, domain), #join column of domain names
by = 'site_code') %>%
filter(! is.na(domain)) %>% #some site data is missing in MS version 1 (whoops)
pivot_wider(names_from = c(domain, site_code),
values_from = val,
names_sep = '__') %>% #column names are of the form <domain>__<site_code>
arrange(datetime) %>% #make sure it's all sorted chronologically
complete(datetime = seq(first(datetime), last(datetime), by = 'day')) #explicate missing observations
suppressPackageStartupMessages(library(xts))
library(dygraphs)
dmn_colors <- factor(str_split_fixed(colnames(doc_wide)[-1], '__', n = 2)[, 1])
levels(dmn_colors) <- hcl.colors(length(elev_extreme_domains))
dg <- dygraph(xts(x = doc_wide[, -1], order.by = doc_wide$datetime)) %>%
dyRangeSelector()
for(i in 2:ncol(doc_wide)){
dg <- dySeries(dg, colnames(doc_wide)[i], color = as.character(dmn_colors[i - 1]))
}
dg
#compute CV for each site
#join ws attrs
#glmnet
NO3_N_conc <- ms_load_product(
project_root,
prodname = 'stream_chemistry',
filter_vars = 'NO3_N',
domain = 'niwot',
warn = FALSE
)
Q <- ms_load_product(
project_root,
prodname = 'discharge',
domain = 'niwot',
warn = FALSE
)
NO3_N_conc <- semi_join(NO3_N_conc, Q, by = 'site_code')
#before
filter(NO3_N_conc, datetime == as.POSIXct('1985-05-09 00:00:00', tz = 'UTC'), site_code == 'ALBION')
# A tibble: 1 × 7
datetime site_code var val ms_status ms_interp val_err
<dttm> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 1985-05-09 00:00:00 ALBION GN_NO3_N 0.182 0 0 0.01
NO3_N_flux <- suppressPackageStartupMessages({
ms_calc_flux(NO3_N_conc, Q, q_type = 'discharge')
})
[1] "q dataset has a daily interval"
#after
filter(NO3_N_flux, datetime == as.POSIXct('1985-05-09 00:00:00', tz = 'UTC'), site_code == 'ALBION')
# A tibble: 1 × 7
datetime site_code var val ms_status ms_interp val_err
<dttm> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 1985-05-09 00:00:00 ALBION GN_NO3_N 0.00113 0 0 0.0000621
# this function will be officially included in the macrosheds package when we
# release version 2 of the MacroSheds dataset, which will include monthly and
# yearly load estimates based on Gubbins et al. in prep. For now, you can call it
# with `:::`, which accesses "internal" package functions.
NO3_N_load <- macrosheds:::ms_calc_flux_rsfme(
NO3_N_conc, Q,
method = 'beale',
aggregation = 'annual'
)
ms_run_egret(stream_chemistry = filter(NO3_N_conc, site_code == 'ALBION'),
discharge = filter(Q, site_code == 'ALBION'))
(kg_ha_d <- slice(NO3_N_flux, 1:3))
# A tibble: 3 × 7
datetime site_code var val ms_status ms_interp val_err
<dttm> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 1985-05-09 00:00:00 ALBION GN_NO3_N 0.00113 0 0 0.0000621
2 1985-05-10 00:00:00 ALBION GN_NO3_N 0.00180 0 1 0.000107
3 1985-05-11 00:00:00 ALBION GN_NO3_N 0.00101 0 1 0.0000654
(kg_d <- ms_undo_scale_flux_by_area(kg_ha_d))
# A tibble: 3 × 7
datetime site_code var val ms_status ms_interp val_err
<dttm> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 1985-05-09 00:00:00 ALBION GN_NO3_N 0.813 0 0 0.0000621
2 1985-05-10 00:00:00 ALBION GN_NO3_N 1.29 0 1 0.000107
3 1985-05-11 00:00:00 ALBION GN_NO3_N 0.724 0 1 0.0000654
(kg_ha_d <- ms_scale_flux_by_area(kg_d))
# A tibble: 3 × 7
datetime site_code var val ms_status ms_interp val_err
<dttm> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 1985-05-09 00:00:00 ALBION GN_NO3_N 0.00113 0 0 0.0000621
2 1985-05-10 00:00:00 ALBION GN_NO3_N 0.00180 0 1 0.000107
3 1985-05-11 00:00:00 ALBION GN_NO3_N 0.00101 0 1 0.0000654
#NOTE: not every function set up to perform error computations yet
?ms_generate_attribution
attrib <- ms_generate_attribution(doc, chem_source = 'stream')
ls.str(attrib)
acknowledgements : chr "Primary data were provided by the following sources:\n1. East River SFA DOE (Funded by the Department of Energy"| __truncated__
bibliography : chr [1:120] "@article{vlah_etal_macrosheds_2023,\n\tauthor = {Vlah, Michael J. and Rhea, Spencer and Bernhardt, Emily S. and"| __truncated__ ...
full_details_timeseries : tibble [101 × 27] (S3: tbl_df/tbl/data.frame)
full_details_ws_attr : tibble [28 × 8] (S3: tbl_df/tbl/data.frame)
intellectual_rights_explanations : chr [1:4] "A share-alike license means derivative works must propagate the original license terms. If may_disregard_with_p"| __truncated__ ...
intellectual_rights_notifications : List of 4
$ sharealike_license : tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
$ notify_of_intent_M : tibble [1 × 4] (S3: tbl_df/tbl/data.frame)
$ notify_on_distribution_M: tibble [1 × 4] (S3: tbl_df/tbl/data.frame)
$ provide_access_M : tibble [1 × 4] (S3: tbl_df/tbl/data.frame)
attrib$intellectual_rights_notifications
$sharealike_license
# A tibble: 1 × 5
network domain macrosheds_prodname may_disregard_with_permission contact
<chr> <chr> <chr> <lgl> <chr>
1 <NA> <NA> tcw (watershed attribute) FALSE https:…
$notify_of_intent_M
# A tibble: 1 × 4
network domain macrosheds_prodname contact
<chr> <chr> <chr> <chr>
1 lter plum stream_chemistry pie_im@mbl.edu
$notify_on_distribution_M
# A tibble: 1 × 4
network domain macrosheds_prodname contact
<chr> <chr> <chr> <chr>
1 lter plum stream_chemistry pie_im@mbl.edu
$provide_access_M
# A tibble: 1 × 4
network domain macrosheds_prodname contact
<chr> <chr> <chr> <chr>
1 lter plum stream_chemistry pie_im@mbl.edu
attrib$intellectual_rights_explanations
[1] "A share-alike license means derivative works must propagate the original license terms. If may_disregard_with_permission is TRUE, you may ask the primary source contact for permission to use your own license."
[2] "notify_of_intent_M means the primary source requires notice of any plans to publish derivative works that use their data."
[3] "notify_on_distribution_M means the primary source requires that they be informed of any publications resulting from their data."
[4] "provide_access_M means the primary source requires online access to any publications resulting from their data."
t(attrib$full_details_timeseries[1, ])
[,1]
domain "east_river"
network "doe"
macrosheds_prodcode "VERSIONLESS008"
macrosheds_prodname "ws_boundary"
doi "10.21952/WTR/1508403"
data_status NA
license "https://creativecommons.org/licenses/by/4.0/"
license_type "Attribution"
license_sharealike NA
IR_acknowledgement_text NA
IR_acknowledge_domain NA
IR_acknowledge_funding_sources NA
IR_acknowledge_grant_numbers NA
IR_notify_of_intentions NA
IR_notify_on_distribution NA
IR_provide_online_access NA
IR_provide_two_reprints NA
IR_collaboration_consultation NA
IR_questions NA
IR_needs_clarification NA
contact "rosemary.carroll@dri.edu"
contact_name1 "Rosemary Carroll"
creator_name1 "Rosemary Carroll"
funding "Funded by the Department of Energy"
citation "Carroll, R., Bill, M., Dong, W., & Williams, K. (2019). Sub-Basin Delineation for the Upper East River, Colorado, United States. Watershed Function SFA. https://doi.org/10.21952/WTR/1508403"
link "https://data.ess-dive.lbl.gov/catalog/d1/mn/v2/packages/application%2Fbagit-097/ess-dive-3b9a58adc163fd4-20190509T155206477018"
link_download_datetime "2022-05-16 18:26:32 UTC"
attrib <- ms_generate_attribution(doc, chem_source = 'stream', write_to_dir = '~')
Warning in dir.create(write_to_dir):
'/home/mike/macrosheds_attribution_information' already exists
Output files written to ~/macrosheds_attribution_information/
read_file('~/macrosheds_attribution_information/ms_bibliography.bib') %>%
substr(1, 1092) %>%
cat()
@article{vlah_etal_macrosheds_2023,
author = {Vlah, Michael J. and Rhea, Spencer and Bernhardt, Emily S. and Slaughter, Weston and Gubbins, Nick and DelVecchia, Amanda G. and Thellman, Audrey and Ross, Matthew R. V.},
title = {MacroSheds: A synthesis of long-term biogeochemical, hydroclimatic, and geospatial data from small watershed ecosystem studies},
journal = {Limnology and Oceanography Letters},
volume = {8},
number = {3},
pages = {419-452},
doi = {https://doi.org/10.1002/lol2.10325},
url = {https://aslopubs.onlinelibrary.wiley.com/doi/abs/10.1002/lol2.10325},
year = {2023},
}
@article{carroll_sub-basin_2019,
title = {Sub-{Basin} {Delineation} for the {Upper} {East} {River}, {Colorado}, {United} {States}},
doi = {10.21952/WTR/1508403},
journal = {Watershed Function SFA},
author = {Carroll, R. and Bill, M. and Dong, W. and Williams, K.},
year = {2019},
}
@misc{usda_forest_service_southern_region_francis_2011,
title = {Francis {Marion} \& {Sumter} {National} {Forests}: {SEF}\_Boundary},
author = {{USDA Forest Service, Southern Region}},
year = {2011},
}
(interactive, so output not included in the HTML version of this document)
can work where StreamStats fails, especially in small watersheds
whitebox::install_whitebox() #if this fails, use the next line
# whitebox::install_whitebox(platform = 'linux_musl')
tmp <- tempdir()
out <- ms_delineate_watershed(
lat = 44.21013,
long = -122.2571,
crs = 4326,
write_dir = tmp,
write_name = 'example_site',
)
str(out) #specifications of your successful delineation
(watershed boundaries, precip gauge locations, stream gauge locations)
suppressPackageStartupMessages(library(mapview))
ws_bound <- ms_load_spatial_product(project_root, 'ws_boundary', domain = 'boulder')
prcp_gauges <- ms_load_spatial_product(project_root, 'stream_gauge_locations', domain = 'boulder')
strm_gauges <- ms_load_spatial_product(project_root, 'precip_gauge_locations', domain = 'boulder')
mapview(ws_bound) + mapview(prcp_gauges) + mapview(strm_gauges, col.regions = rainbow(n = 3))
Vignettes will only load if you installed macrosheds
with build_vignettes=TRUE, but they’re also hosted as
markdown files here.
These provide tutorials on data retrieval, flux calculation, watershed
delineation, and precip interpolation.
vignette(package = 'macrosheds')
vignette('ms_watershed_delineation', package = 'macrosheds')
?ms_synchronize_timestep # upsample or downsample macrosheds data
?ms_calc_watershed_precip # spatial interpolation of precip gauge data
?ms_separate_baseflow # baseflow/stormflow separation via hydrostats